 // UKPVSIdeology
log using UKPVSIdeology.log, replace

use "C:\Users\sbstjp\OneDrive - Cardiff University\UKPVS.dta" // Prosser, Magasin, Proulx and Haddock, UK Progressive Values Dataset, September 2024 // Accessed on March 20 2025

// Create a weight
gen weight = 1

* Code age into categories - the other variables are already in such categories
recode ageNew (min/24=1 "0-24") (25/34=2 "25-34") (35/44=3 "35-44") (45/54=4 "45-54") (55/max=5 "55+"), generate(age_group)

* Generate totals for the weights - these are based on BESW29 as this dataset has political selfid, unlike census data
replace gender=. if inrange(gender, 3, 5)
rename gender FemaleGender
gen sextot=.
replace sextot = 0.50 if FemaleGender == 1 // Male
replace sextot = 0.50 if FemaleGender == 2 // Female

gen agetot=.
replace agetot = 0.13 if age_group == 1 //18-24
replace agetot = 0.14 if age_group == 2 //25-34
replace agetot = 0.19 if age_group == 3 //35-44
replace agetot = 0.19 if age_group == 4 //45-54
replace agetot = 0.35 if age_group == 5 //55+

gen ethtot=.
replace ethtot = 0.86 if Ethnicity2 == 1 // White
replace ethtot = 0.06 if Ethnicity2 == 2 // Asian
replace ethtot = 0.04 if Ethnicity2 == 3 // Black
replace ethtot = 0.04 if Ethnicity2 == 4 // Mixed

gen edcats=.
replace edcats=1 if inrange(Education, 1, 15) // Unidiplomaandbelow
replace edcats=2 if Education==16 // Undergraddegree
replace edcats=3 if inrange(Education, 17, 18) // Postgradandabove

gen edtot=.
replace edtot = 0.4740 if edcats == 1 // Unidiplomaandbelow
replace edtot = 0.2969 if edcats == 2 // Undergraddegree
replace edtot = 0.2291 if edcats == 3 // Postgradandabove

gen inccats=.
replace inccats=1 if inrange(Household_Income, 1, 6) //under 30k
replace inccats=2 if inrange(Household_Income, 7, 11) //30-60k
replace inccats=3 if inlist(Household_Income, 12, 13) //60-100k
replace inccats=4 if inlist(Household_Income, 14, 15) //over 100k

gen inctot=.
replace inctot = 0.3806 if inccats == 1 //under 30k
replace inctot = 0.3524 if inccats == 2 //30-60k
replace inctot = 0.1922 if inccats == 3 //60-100k
replace inctot = 0.0747 if inccats == 4 //over 100k

* Rake the weights using the Stata survwgt package
survwgt rake weight , by(FemaleGender age_group Ethnicity2 edcats inccats) totvars(sextot agetot ethtot edtot inctot) generate(rakedweight)

// Liberal Values
*Reverse variables so liberalism coded high
foreach var in LiberalAuthorit_al1 LiberalAuthorit_al2 LiberalAuthorit_al3 LiberalAuthorit_al4 LiberalAuthorit_al5 {
    qui sum `var'
    local max_value = r(max)
    gen r`var' = `max_value' - `var'
}

*Standardize items in the scale from 1-2 - this avoids 0, for reasons outlined in next step
foreach var in rLiberalAuthorit_al1 rLiberalAuthorit_al2 rLiberalAuthorit_al3 rLiberalAuthorit_al4 rLiberalAuthorit_al5 {
    summarize `var'
    gen s`var' = 1 + (`var' - r(min)) / (r(max) - r(min))
}

* Replace missing values with 0 for the specified variables - this is necessary as Stata doesn't add up missing values and means a 0-1 standardization scale isn't feasible as missing values would overlap with the scale
foreach var in srLiberalAuthorit_al1 srLiberalAuthorit_al2 srLiberalAuthorit_al3 srLiberalAuthorit_al4 srLiberalAuthorit_al5 {
    replace `var' = 0 if missing(`var')
}

* Initialize the total score and the count of non-zero responses
gen total_scoreAL = 0
gen count_nonzeroAL = 0

* Add each variable to the total scale score and count it if non-zero
foreach var in srLiberalAuthorit_al1 srLiberalAuthorit_al2 srLiberalAuthorit_al3 srLiberalAuthorit_al4 srLiberalAuthorit_al5 {
    replace total_scoreAL = total_scoreAL + `var'
    replace count_nonzeroAL = count_nonzeroAL + (`var' != 0)
}

* Calculate the average score, avoiding division by zero
gen LibValues = .
replace LibValues = total_scoreAL / count_nonzeroAL if count_nonzeroAL > 0

// Reverse politicalID variable so left coded high
egen maxval = max(PolitID)
gen PoliticalID = maxval + 1 - PolitID
drop maxval

// Correlations
pwcorr PVS LibValues PoliticalID [aweight=rakedweight], sig

// Ideological constraint
*Create graduate dummy
gen Graduate=. 
replace Graduate=0 if inrange(Education, 1, 15)
replace Graduate=1 if inrange(Education, 16, 18)

pwcorr PVS_MD PVS_REI_Reversed PVS_CAC PVS_PC [aweight=rakedweight], sig
pwcorr PVS_MD PVS_REI_Reversed PVS_CAC PVS_PC if Graduate==1 [aweight=rakedweight], sig 
pwcorr PVS_MD PVS_REI_Reversed PVS_CAC PVS_PC if PoliticalID>1 [aweight=rakedweight], sig 

log close